{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7b592537-9da1-48fd-ad83-c47ffd422a66",
   "metadata": {},
   "source": [
    "## 分类算法  LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "105b6191-47ac-49ae-8d89-0ae0a5d5886d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6dd8f169-b9e3-4947-a063-07d2ed4fc188",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>...</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "      <th>good_bad</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1169</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>67</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5951</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>2096</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>49</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7882</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>24</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>4870</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>53</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   checking  duration  history purpose  amount  savings  employed  installp  \\\n",
       "0         1         6        4       3    1169        5         5         4   \n",
       "1         2        48        2       3    5951        1         3         2   \n",
       "2         4        12        4       6    2096        1         4         2   \n",
       "3         1        42        2       2    7882        1         4         2   \n",
       "4         1        24        3       0    4870        1         3         3   \n",
       "\n",
       "   marital  coapp  ...  property  age  other  housing  existcr  job  depends  \\\n",
       "0        3      1  ...         1   67      3        2        2    3        1   \n",
       "1        2      1  ...         1   22      3        2        1    3        1   \n",
       "2        3      1  ...         1   49      3        2        1    2        2   \n",
       "3        3      3  ...         2   45      3        3        1    3        2   \n",
       "4        3      1  ...         4   53      3        3        2    3        2   \n",
       "\n",
       "   telephon  foreign  good_bad  \n",
       "0         2        1      good  \n",
       "1         1        1       bad  \n",
       "2         1        1      good  \n",
       "3         1        1      good  \n",
       "4         1        1       bad  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用pandas导入csv数据，查看前5行导入结果看是否正常\n",
    "import pandas as pd\n",
    "# 'D:/work/machine-learning-master/datasets/credit/credit.csv'\n",
    "credit_df = pd.read_csv(\"../machine-learning-master/datasets/credit/credit.csv\")\n",
    "credit_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "d1a33cd0-d645-499c-ad66-d3ec4138b692",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>...</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "      <th>good_bad</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1169</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>67</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>48</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5951</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>2096</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>49</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7882</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>45</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>good</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>24</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>4870</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>53</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>bad</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   checking  duration  history purpose  amount  savings  employed  installp  \\\n",
       "0         1         6        4       3    1169        5         5         4   \n",
       "1         2        48        2       3    5951        1         3         2   \n",
       "2         4        12        4       6    2096        1         4         2   \n",
       "3         1        42        2       2    7882        1         4         2   \n",
       "4         1        24        3       0    4870        1         3         3   \n",
       "\n",
       "   marital  coapp  ...  property  age  other  housing  existcr  job  depends  \\\n",
       "0        3      1  ...         1   67      3        2        2    3        1   \n",
       "1        2      1  ...         1   22      3        2        1    3        1   \n",
       "2        3      1  ...         1   49      3        2        1    2        2   \n",
       "3        3      3  ...         2   45      3        3        1    3        2   \n",
       "4        3      1  ...         4   53      3        3        2    3        2   \n",
       "\n",
       "   telephon  foreign  good_bad  \n",
       "0         2        1      good  \n",
       "1         1        1       bad  \n",
       "2         1        1      good  \n",
       "3         1        1      good  \n",
       "4         1        1       bad  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用pandas导入csv数据，查看前5行导入结果看是否正常\n",
    "import pandas as pd\n",
    "'D:/work/machine-learning-master/datasets/credit/credit.csv'\n",
    "credit_df = pd.read_csv('D:/work/machine-learning-master/datasets/credit/credit.csv')\n",
    "credit_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "97773af0-4ed5-418d-b3ba-abe609c09738",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1000 entries, 0 to 999\n",
      "Data columns (total 21 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   checking  1000 non-null   int64 \n",
      " 1   duration  1000 non-null   int64 \n",
      " 2   history   1000 non-null   int64 \n",
      " 3   purpose   1000 non-null   object\n",
      " 4   amount    1000 non-null   int64 \n",
      " 5   savings   1000 non-null   int64 \n",
      " 6   employed  1000 non-null   int64 \n",
      " 7   installp  1000 non-null   int64 \n",
      " 8   marital   1000 non-null   int64 \n",
      " 9   coapp     1000 non-null   int64 \n",
      " 10  resident  1000 non-null   int64 \n",
      " 11  property  1000 non-null   int64 \n",
      " 12  age       1000 non-null   int64 \n",
      " 13  other     1000 non-null   int64 \n",
      " 14  housing   1000 non-null   int64 \n",
      " 15  existcr   1000 non-null   int64 \n",
      " 16  job       1000 non-null   int64 \n",
      " 17  depends   1000 non-null   int64 \n",
      " 18  telephon  1000 non-null   int64 \n",
      " 19  foreign   1000 non-null   int64 \n",
      " 20  good_bad  1000 non-null   object\n",
      "dtypes: int64(19), object(2)\n",
      "memory usage: 164.2+ KB\n"
     ]
    }
   ],
   "source": [
    "credit_df.info()    #检查文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "f574f8e6-493c-4b46-bc5f-933b61021587",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>resident</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.00000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.00000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.577000</td>\n",
       "      <td>20.903000</td>\n",
       "      <td>2.54500</td>\n",
       "      <td>3271.258000</td>\n",
       "      <td>2.105000</td>\n",
       "      <td>3.384000</td>\n",
       "      <td>2.973000</td>\n",
       "      <td>2.68200</td>\n",
       "      <td>1.145000</td>\n",
       "      <td>2.845000</td>\n",
       "      <td>2.358000</td>\n",
       "      <td>35.546000</td>\n",
       "      <td>2.675000</td>\n",
       "      <td>1.929000</td>\n",
       "      <td>1.407000</td>\n",
       "      <td>2.904000</td>\n",
       "      <td>1.155000</td>\n",
       "      <td>1.404000</td>\n",
       "      <td>1.037000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.257638</td>\n",
       "      <td>12.058814</td>\n",
       "      <td>1.08312</td>\n",
       "      <td>2822.736876</td>\n",
       "      <td>1.580023</td>\n",
       "      <td>1.208306</td>\n",
       "      <td>1.118715</td>\n",
       "      <td>0.70808</td>\n",
       "      <td>0.477706</td>\n",
       "      <td>1.103718</td>\n",
       "      <td>1.050209</td>\n",
       "      <td>11.375469</td>\n",
       "      <td>0.705601</td>\n",
       "      <td>0.531264</td>\n",
       "      <td>0.577654</td>\n",
       "      <td>0.653614</td>\n",
       "      <td>0.362086</td>\n",
       "      <td>0.490943</td>\n",
       "      <td>0.188856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>250.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>1365.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>27.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>2319.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>33.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>3972.250000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>42.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>18424.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>75.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          checking     duration     history        amount      savings  \\\n",
       "count  1000.000000  1000.000000  1000.00000   1000.000000  1000.000000   \n",
       "mean      2.577000    20.903000     2.54500   3271.258000     2.105000   \n",
       "std       1.257638    12.058814     1.08312   2822.736876     1.580023   \n",
       "min       1.000000     4.000000     0.00000    250.000000     1.000000   \n",
       "25%       1.000000    12.000000     2.00000   1365.500000     1.000000   \n",
       "50%       2.000000    18.000000     2.00000   2319.500000     1.000000   \n",
       "75%       4.000000    24.000000     4.00000   3972.250000     3.000000   \n",
       "max       4.000000    72.000000     4.00000  18424.000000     5.000000   \n",
       "\n",
       "          employed     installp     marital        coapp     resident  \\\n",
       "count  1000.000000  1000.000000  1000.00000  1000.000000  1000.000000   \n",
       "mean      3.384000     2.973000     2.68200     1.145000     2.845000   \n",
       "std       1.208306     1.118715     0.70808     0.477706     1.103718   \n",
       "min       1.000000     1.000000     1.00000     1.000000     1.000000   \n",
       "25%       3.000000     2.000000     2.00000     1.000000     2.000000   \n",
       "50%       3.000000     3.000000     3.00000     1.000000     3.000000   \n",
       "75%       5.000000     4.000000     3.00000     1.000000     4.000000   \n",
       "max       5.000000     4.000000     4.00000     3.000000     4.000000   \n",
       "\n",
       "          property          age        other      housing      existcr  \\\n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000   \n",
       "mean      2.358000    35.546000     2.675000     1.929000     1.407000   \n",
       "std       1.050209    11.375469     0.705601     0.531264     0.577654   \n",
       "min       1.000000    19.000000     1.000000     1.000000     1.000000   \n",
       "25%       1.000000    27.000000     3.000000     2.000000     1.000000   \n",
       "50%       2.000000    33.000000     3.000000     2.000000     1.000000   \n",
       "75%       3.000000    42.000000     3.000000     2.000000     2.000000   \n",
       "max       4.000000    75.000000     3.000000     3.000000     4.000000   \n",
       "\n",
       "               job      depends     telephon      foreign  \n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000  \n",
       "mean      2.904000     1.155000     1.404000     1.037000  \n",
       "std       0.653614     0.362086     0.490943     0.188856  \n",
       "min       1.000000     1.000000     1.000000     1.000000  \n",
       "25%       3.000000     1.000000     1.000000     1.000000  \n",
       "50%       3.000000     1.000000     1.000000     1.000000  \n",
       "75%       3.000000     1.000000     2.000000     1.000000  \n",
       "max       4.000000     2.000000     2.000000     2.000000  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "8847966e-4adb-4583-8d5f-6dde387d2b5d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "checking    0\n",
       "duration    0\n",
       "history     0\n",
       "purpose     0\n",
       "amount      0\n",
       "savings     0\n",
       "employed    0\n",
       "installp    0\n",
       "marital     0\n",
       "coapp       0\n",
       "resident    0\n",
       "property    0\n",
       "age         0\n",
       "other       0\n",
       "housing     0\n",
       "existcr     0\n",
       "job         0\n",
       "depends     0\n",
       "telephon    0\n",
       "foreign     0\n",
       "good_bad    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.isnull().sum()  #检查是否有空白数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "ec428d0e-7d84-4217-9673-1fb307b28aff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      False\n",
       "1      False\n",
       "2      False\n",
       "3      False\n",
       "4      False\n",
       "       ...  \n",
       "995    False\n",
       "996    False\n",
       "997    False\n",
       "998    False\n",
       "999    False\n",
       "Length: 1000, dtype: bool"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.duplicated()  #找出重复的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "427f49f1-c939-4375-a631-6103249c3151",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>...</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "      <th>good_bad</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>0 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [checking, duration, history, purpose, amount, savings, employed, installp, marital, coapp, resident, property, age, other, housing, existcr, job, depends, telephon, foreign, good_bad]\n",
       "Index: []\n",
       "\n",
       "[0 rows x 21 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df[credit_df.duplicated()]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da794533-ceee-4818-926b-50512067b042",
   "metadata": {},
   "source": [
    "## 检查数据套餐：\n",
    "df.info()\n",
    "df.describe()\n",
    "df.isnull().sum()\n",
    "df.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "90b77fa4-5cbf-4dcf-a0f6-5b0035f570df",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      1\n",
       "1      2\n",
       "2      4\n",
       "3      1\n",
       "4      1\n",
       "      ..\n",
       "995    4\n",
       "996    1\n",
       "997    4\n",
       "998    1\n",
       "999    2\n",
       "Name: checking, Length: 1000, dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "credit_df.checking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "e42ddf5b-0e25-46e9-ad3f-4302e04ff426",
   "metadata": {},
   "outputs": [],
   "source": [
    "#读热编码\n",
    "\n",
    "#                                            ，标题前缀\n",
    "checking = pd.get_dummies(credit_df.checking,prefix='checking')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "3e44c973-8674-4783-88a4-e65000065f2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking_1</th>\n",
       "      <th>checking_2</th>\n",
       "      <th>checking_3</th>\n",
       "      <th>checking_4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     checking_1  checking_2  checking_3  checking_4\n",
       "0          True       False       False       False\n",
       "1         False        True       False       False\n",
       "2         False       False       False        True\n",
       "3          True       False       False       False\n",
       "4          True       False       False       False\n",
       "..          ...         ...         ...         ...\n",
       "995       False       False       False        True\n",
       "996        True       False       False       False\n",
       "997       False       False       False        True\n",
       "998        True       False       False       False\n",
       "999       False        True       False       False\n",
       "\n",
       "[1000 rows x 4 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "checking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "d613f702-543d-4f96-847c-ab81603291c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>purpose_0</th>\n",
       "      <th>purpose_1</th>\n",
       "      <th>purpose_2</th>\n",
       "      <th>purpose_3</th>\n",
       "      <th>purpose_4</th>\n",
       "      <th>purpose_5</th>\n",
       "      <th>purpose_6</th>\n",
       "      <th>purpose_8</th>\n",
       "      <th>purpose_9</th>\n",
       "      <th>purpose_X</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   purpose_0  purpose_1  purpose_2  purpose_3  purpose_4  purpose_5  \\\n",
       "0      False      False      False       True      False      False   \n",
       "1      False      False      False       True      False      False   \n",
       "2      False      False      False      False      False      False   \n",
       "3      False      False       True      False      False      False   \n",
       "4       True      False      False      False      False      False   \n",
       "\n",
       "   purpose_6  purpose_8  purpose_9  purpose_X  \n",
       "0      False      False      False      False  \n",
       "1      False      False      False      False  \n",
       "2       True      False      False      False  \n",
       "3      False      False      False      False  \n",
       "4      False      False      False      False  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 把类别型变量进行独热编码(1->N)\n",
    "checking = pd.get_dummies(credit_df.checking,prefix='checking')\n",
    "history = pd.get_dummies(credit_df.history,prefix='history')\n",
    "purpose = pd.get_dummies(credit_df.purpose,prefix='purpose')\n",
    "savings = pd.get_dummies(credit_df.savings,prefix='savings')\n",
    "employed = pd.get_dummies(credit_df.employed,prefix='employed')\n",
    "installp = pd.get_dummies(credit_df.installp,prefix='installp')\n",
    "marital = pd.get_dummies(credit_df.marital,prefix='marital')\n",
    "coapp = pd.get_dummies(credit_df.coapp,prefix='coapp')\n",
    "installp = pd.get_dummies(credit_df.installp,prefix='installp')\n",
    "resident = pd.get_dummies(credit_df.resident,prefix='resident')\n",
    "property = pd.get_dummies(credit_df.property,prefix='property')\n",
    "housing = pd.get_dummies(credit_df.housing,prefix='housing')\n",
    "existcr = pd.get_dummies(credit_df.existcr,prefix='existcr')\n",
    "job = pd.get_dummies(credit_df.job,prefix='job')\n",
    "depends = pd.get_dummies(credit_df.depends,prefix='depends')\n",
    "telephon = pd.get_dummies(credit_df.telephon,prefix='telephon')\n",
    "foreign = pd.get_dummies(credit_df.foreign,prefix='foreign')\n",
    "\n",
    "# 随便查看一个编码后的数据\n",
    "purpose.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "f7e7a23f-827d-4f8a-ad88-f122f8788ede",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>amount</th>\n",
       "      <th>age</th>\n",
       "      <th>checking_1</th>\n",
       "      <th>checking_2</th>\n",
       "      <th>checking_3</th>\n",
       "      <th>checking_4</th>\n",
       "      <th>history_0</th>\n",
       "      <th>history_1</th>\n",
       "      <th>history_2</th>\n",
       "      <th>...</th>\n",
       "      <th>job_1</th>\n",
       "      <th>job_2</th>\n",
       "      <th>job_3</th>\n",
       "      <th>job_4</th>\n",
       "      <th>depends_1</th>\n",
       "      <th>depends_2</th>\n",
       "      <th>telephon_1</th>\n",
       "      <th>telephon_2</th>\n",
       "      <th>foreign_1</th>\n",
       "      <th>foreign_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>1169</td>\n",
       "      <td>67</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>48</td>\n",
       "      <td>5951</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>2096</td>\n",
       "      <td>49</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>42</td>\n",
       "      <td>7882</td>\n",
       "      <td>45</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24</td>\n",
       "      <td>4870</td>\n",
       "      <td>53</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 72 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration  amount  age  checking_1  checking_2  checking_3  checking_4  \\\n",
       "0         6    1169   67        True       False       False       False   \n",
       "1        48    5951   22       False        True       False       False   \n",
       "2        12    2096   49       False       False       False        True   \n",
       "3        42    7882   45        True       False       False       False   \n",
       "4        24    4870   53        True       False       False       False   \n",
       "\n",
       "   history_0  history_1  history_2  ...  job_1  job_2  job_3  job_4  \\\n",
       "0      False      False      False  ...  False  False   True  False   \n",
       "1      False      False       True  ...  False  False   True  False   \n",
       "2      False      False      False  ...  False   True  False  False   \n",
       "3      False      False       True  ...  False  False   True  False   \n",
       "4      False      False      False  ...  False  False   True  False   \n",
       "\n",
       "   depends_1  depends_2  telephon_1  telephon_2  foreign_1  foreign_2  \n",
       "0       True      False       False        True       True      False  \n",
       "1       True      False        True       False       True      False  \n",
       "2      False       True        True       False       True      False  \n",
       "3      False       True        True       False       True      False  \n",
       "4      False       True        True       False       True      False  \n",
       "\n",
       "[5 rows x 72 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用concat()函数把转换后的数据对象全部合并在一起变成新的数据对象，命名为trainData_X\n",
    "trainData_X = pd.concat([credit_df.duration, credit_df.amount, credit_df.age, checking, history, purpose, savings, employed, installp, marital, coapp, installp, resident, property, housing, existcr, job, depends, telephon, foreign], axis=1)\n",
    "trainData_X.head()\n",
    "# 72 columns\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "2a82f152-5265-4d3c-a297-3ed5bed9be5e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "0    700\n",
       "1    300\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构造目标变量y的数据对象\n",
    "# 当前的目标变量good_bad的取值是'good'和'bad'，一般我们习惯把二分类的类别值编码为0和1，1一般表示类别比较少的那一类，他们往往是我们感兴趣的\n",
    "credit_df['target'] = 0\n",
    "credit_df.loc[(credit_df.good_bad == 'bad'), 'target'] = 1\n",
    "\n",
    "trainData_y = credit_df['target'] \n",
    "trainData_y.value_counts()   #统计不同的值的个数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "223214d6-0c8e-4f7d-8c4e-67473bc81f90",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 拆分训练集和测试集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(trainData_X, trainData_y, test_size=0.3, random_state=123456)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "eee22be0-4e9f-401c-af01-47a25e0987f0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(700, 72) (300, 72)\n"
     ]
    }
   ],
   "source": [
    "# 查看一下拆分后的train和test的shape，确认拆分正确\n",
    "print(X_train.shape,X_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61224e8d-d91e-489d-a4fe-f579045dd1af",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "e3e7fba0-aaed-4b77-8936-5f46fd3d3d01",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-1 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-1 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-1 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-1 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression(solver='liblinear')"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "# 通过LogisticRegression类定义一个逻辑回归模型名字叫lr\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "\n",
    "# 对lr模型进行训练(fit)\n",
    "lr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "f748922f-a085-485f-aab6-8797762249c7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-2.57923474e-05]\n",
      "[[-6.49846380e-10 -2.09991267e-04 -3.85388758e-04 -1.28894506e-03\n",
      "  -1.92140688e-03 -2.20283701e-06 -8.33498928e-07  1.03823720e-06\n",
      "   5.66066954e-07 -4.16932614e-06 -1.63444709e-06 -1.16315261e-06\n",
      "  -3.11139825e-05 -6.78920911e-06  4.28492459e-04 -1.85264493e-07\n",
      "  -3.46716650e-07 -3.83955616e-07 -1.47704432e-07 -5.20718431e-07\n",
      "  -8.73379491e-08 -1.97043955e-04 -4.90186742e-04 -1.20691514e-03\n",
      "   1.96344776e-03 -2.85069656e-06 -1.10192133e-06  1.11643052e-06\n",
      "   1.45106233e-07 -6.01808678e-06 -1.83820447e-06 -4.70468965e-05\n",
      "   2.12545491e-05 -4.70468965e-05  2.12545491e-05]]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[-6.498463801084951e-10]</td>\n",
       "      <td>842302</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[-0.00020999126742133318]</td>\n",
       "      <td>17.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[-0.000385388758125898]</td>\n",
       "      <td>10.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[-0.0012889450582046521]</td>\n",
       "      <td>122.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[-0.001921406882053203]</td>\n",
       "      <td>1001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[-2.202837009819872e-06]</td>\n",
       "      <td>0.1184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[-8.334989280661177e-07]</td>\n",
       "      <td>0.2776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[1.0382372003163258e-06]</td>\n",
       "      <td>0.3001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[5.66066953825104e-07]</td>\n",
       "      <td>0.1471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[-4.16932613508626e-06]</td>\n",
       "      <td>0.2419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>[-1.634447092021541e-06]</td>\n",
       "      <td>0.07871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>[-1.1631526091791624e-06]</td>\n",
       "      <td>1.095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>[-3.111398249939994e-05]</td>\n",
       "      <td>0.9053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>[-6.789209109443947e-06]</td>\n",
       "      <td>8.589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>[0.00042849245871541623]</td>\n",
       "      <td>153.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>[-1.8526449305401452e-07]</td>\n",
       "      <td>0.006399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>[-3.4671665001186047e-07]</td>\n",
       "      <td>0.04904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>[-3.839556164883393e-07]</td>\n",
       "      <td>0.05373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>[-1.4770443165920316e-07]</td>\n",
       "      <td>0.01587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>[-5.207184314349898e-07]</td>\n",
       "      <td>0.03003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>[-8.733794913967671e-08]</td>\n",
       "      <td>0.006193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>[-0.00019704395536422148]</td>\n",
       "      <td>25.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>[-0.0004901867424322411]</td>\n",
       "      <td>17.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>[-0.0012069151439974548]</td>\n",
       "      <td>184.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>[0.0019634477585179838]</td>\n",
       "      <td>2019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>[-2.850696558830879e-06]</td>\n",
       "      <td>0.1622</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>[-1.1019213326120733e-06]</td>\n",
       "      <td>0.6656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>[1.1164305208223171e-06]</td>\n",
       "      <td>0.7119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>[1.451062328122699e-07]</td>\n",
       "      <td>0.2654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>[-6.018086784298902e-06]</td>\n",
       "      <td>0.4601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>[-1.8382044670486964e-06]</td>\n",
       "      <td>0.1189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>[-4.7046896508097187e-05]</td>\n",
       "      <td>M_B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>[2.125454909793857e-05]</td>\n",
       "      <td>M_M</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>[-4.7046896508097187e-05]</td>\n",
       "      <td>M_B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>[2.125454909793857e-05]</td>\n",
       "      <td>M_M</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         coef   columns\n",
       "0    [-6.498463801084951e-10]    842302\n",
       "1   [-0.00020999126742133318]     17.99\n",
       "2     [-0.000385388758125898]     10.38\n",
       "3    [-0.0012889450582046521]     122.8\n",
       "4     [-0.001921406882053203]      1001\n",
       "5    [-2.202837009819872e-06]    0.1184\n",
       "6    [-8.334989280661177e-07]    0.2776\n",
       "7    [1.0382372003163258e-06]    0.3001\n",
       "8      [5.66066953825104e-07]    0.1471\n",
       "9     [-4.16932613508626e-06]    0.2419\n",
       "10   [-1.634447092021541e-06]   0.07871\n",
       "11  [-1.1631526091791624e-06]     1.095\n",
       "12   [-3.111398249939994e-05]    0.9053\n",
       "13   [-6.789209109443947e-06]     8.589\n",
       "14   [0.00042849245871541623]     153.4\n",
       "15  [-1.8526449305401452e-07]  0.006399\n",
       "16  [-3.4671665001186047e-07]   0.04904\n",
       "17   [-3.839556164883393e-07]   0.05373\n",
       "18  [-1.4770443165920316e-07]   0.01587\n",
       "19   [-5.207184314349898e-07]   0.03003\n",
       "20   [-8.733794913967671e-08]  0.006193\n",
       "21  [-0.00019704395536422148]     25.38\n",
       "22   [-0.0004901867424322411]     17.33\n",
       "23   [-0.0012069151439974548]     184.6\n",
       "24    [0.0019634477585179838]      2019\n",
       "25   [-2.850696558830879e-06]    0.1622\n",
       "26  [-1.1019213326120733e-06]    0.6656\n",
       "27   [1.1164305208223171e-06]    0.7119\n",
       "28    [1.451062328122699e-07]    0.2654\n",
       "29   [-6.018086784298902e-06]    0.4601\n",
       "30  [-1.8382044670486964e-06]    0.1189\n",
       "31  [-4.7046896508097187e-05]       M_B\n",
       "32    [2.125454909793857e-05]       M_M\n",
       "33  [-4.7046896508097187e-05]       M_B\n",
       "34    [2.125454909793857e-05]       M_M"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看模型结果\n",
    "print(lr.intercept_ )\n",
    "print(lr.coef_)\n",
    "\n",
    "# 把变量名称和系数对应在一起方便查看\n",
    "pd.DataFrame(list(zip(np.transpose(lr.coef_), X_train.columns)), columns=['coef', 'columns'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "bbdb4610-5734-49be-ba00-9a4c6983efd5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-4 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-4 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-4 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-4 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-4 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-4 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-4 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-4 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-4 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-4 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-4 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-4 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-4 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(C=1, solver=&#x27;liblinear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(C=1, solver=&#x27;liblinear&#x27;)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression(C=1, solver='liblinear')"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "22f9e021-f1ec-4b30-85e3-3d499897d8ba",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "509f6e46-6280-47de-b75a-06f56a20cefc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-0.29172704]\n",
      "[[ 2.83466982e-02  1.11552652e-04 -1.32186963e-02  7.30302631e-01\n",
      "   1.89698171e-01 -1.48413761e-01 -1.06331408e+00  5.52943180e-01\n",
      "   7.00967840e-01 -1.95923164e-01 -5.08295211e-01 -8.41419686e-01\n",
      "   5.78994877e-01 -1.02765832e+00 -5.19006858e-01 -3.10160670e-01\n",
      "   8.35543077e-02  4.42862984e-01  9.71425233e-01 -4.01707082e-02\n",
      "  -2.75685722e-01 -1.95882164e-01  4.20838317e-01  2.60894368e-01\n",
      "  -1.99076065e-02 -4.51038599e-01 -5.02513521e-01  4.00136767e-03\n",
      "   1.67240631e-01 -6.34897466e-03 -4.30207673e-01 -2.64123928e-02\n",
      "  -3.24743571e-01 -1.61721874e-01  9.33190965e-02  1.01419308e-01\n",
      "   2.42408395e-01  4.83753075e-02 -4.54944815e-01 -1.27565929e-01\n",
      "  -7.68563952e-02  3.92533348e-01 -6.07403995e-01 -3.24743571e-01\n",
      "  -1.61721874e-01  9.33190965e-02  1.01419308e-01 -4.82693136e-01\n",
      "   3.70446525e-01 -1.35055794e-02 -1.65974851e-01 -2.38577143e-01\n",
      "   2.64573097e-02 -4.20541305e-02 -3.75530774e-02  2.21797288e-01\n",
      "  -3.75507125e-01 -1.38017204e-01 -3.65479382e-01 -7.63090770e-03\n",
      "  -1.44427173e-01  2.25810421e-01 -4.82077729e-02 -1.39161870e-01\n",
      "   6.11596796e-02 -1.65517078e-01 -3.55525185e-01  6.37981434e-02\n",
      "   2.23334828e-02 -3.14060524e-01  5.70464532e-01 -8.62191573e-01]]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[0.028346698229942596]</td>\n",
       "      <td>duration</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[0.00011155265231640791]</td>\n",
       "      <td>amount</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[-0.013218696255764923]</td>\n",
       "      <td>age</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[0.730302631292299]</td>\n",
       "      <td>checking_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[0.18969817092559277]</td>\n",
       "      <td>checking_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>[0.06379814341236087]</td>\n",
       "      <td>depends_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>[0.022333482818731164]</td>\n",
       "      <td>telephon_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>[-0.3140605242224358]</td>\n",
       "      <td>telephon_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>[0.57046453204328]</td>\n",
       "      <td>foreign_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>[-0.8621915734469889]</td>\n",
       "      <td>foreign_2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>72 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        coef     columns\n",
       "0     [0.028346698229942596]    duration\n",
       "1   [0.00011155265231640791]      amount\n",
       "2    [-0.013218696255764923]         age\n",
       "3        [0.730302631292299]  checking_1\n",
       "4      [0.18969817092559277]  checking_2\n",
       "..                       ...         ...\n",
       "67     [0.06379814341236087]   depends_2\n",
       "68    [0.022333482818731164]  telephon_1\n",
       "69     [-0.3140605242224358]  telephon_2\n",
       "70        [0.57046453204328]   foreign_1\n",
       "71     [-0.8621915734469889]   foreign_2\n",
       "\n",
       "[72 rows x 2 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "# 查看模型结果\n",
    "print(lr.intercept_ )\n",
    "print(lr.coef_)\n",
    "\n",
    "# 把变量名称和系数对应在一起方便查看\n",
    "pd.DataFrame(list(zip(np.transpose(lr.coef_), X_train.columns)), columns=['coef', 'columns'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "56e5b3c7-5dc9-473c-995d-502e49aa6961",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression(C=1, solver='liblinear')\n",
      "0.7628571428571428\n",
      "{'C': 1, 'penalty': 'l2'}\n"
     ]
    }
   ],
   "source": [
    "# grid search调参\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "parameters = {\n",
    "    'penalty': ('l1', 'l2'),\n",
    "    'C': (0.01, 0.1, 0.5 , 1,5, 10),\n",
    "}\n",
    "\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "lr_search = GridSearchCV(lr, parameters, scoring='accuracy', cv=5)\n",
    "lr_search.fit(X_train, y_train)\n",
    "\n",
    "#查看最佳结果\n",
    "print(lr_search.best_estimator_)\n",
    "print(lr_search.best_score_)\n",
    "print(lr_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "fbef9e4c-5bfd-47bb-a633-d742ac37f968",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.93      0.74      0.82       111\n",
      "           1       0.65      0.90      0.76        60\n",
      "\n",
      "    accuracy                           0.80       171\n",
      "   macro avg       0.79      0.82      0.79       171\n",
      "weighted avg       0.83      0.80      0.80       171\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = lr.predict(X_test)\n",
    "y_test_prob = lr.predict_proba(X_test)\n",
    "\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2758d7e2-5adb-4d4f-ae62-2947de3335dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 误分类矩阵 confusion_matrix\n",
    "print(confusion_matrix(y_test,y_test_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02f8c84d-7700-430d-b1ba-7efce1848a11",
   "metadata": {},
   "outputs": [],
   "source": [
    "    0     1    \n",
    "0  178    32      TP   TN\n",
    "1   39    51      FP   FN\n",
    "\n",
    "数据准备  （读入数据，检查数据，one-hot，检查Target）\n",
    "构建数据集 （train_test_split 训练集， 测试集）\n",
    "建模\n",
    "评价\n",
    "混淆矩阵：纵向是增值，水平向是"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "49e8acd9-6d99-4026-9957-5aeaf99820a3",
   "metadata": {},
   "source": [
    "## 作业"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "7b3c9311-ca8b-4d0d-b0e8-f6361dd23d5e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 568 entries, 0 to 567\n",
      "Data columns (total 32 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   842302    568 non-null    int64  \n",
      " 1   M         568 non-null    object \n",
      " 2   17.99     568 non-null    float64\n",
      " 3   10.38     568 non-null    float64\n",
      " 4   122.8     568 non-null    float64\n",
      " 5   1001      568 non-null    float64\n",
      " 6   0.1184    568 non-null    float64\n",
      " 7   0.2776    568 non-null    float64\n",
      " 8   0.3001    568 non-null    float64\n",
      " 9   0.1471    568 non-null    float64\n",
      " 10  0.2419    568 non-null    float64\n",
      " 11  0.07871   568 non-null    float64\n",
      " 12  1.095     568 non-null    float64\n",
      " 13  0.9053    568 non-null    float64\n",
      " 14  8.589     568 non-null    float64\n",
      " 15  153.4     568 non-null    float64\n",
      " 16  0.006399  568 non-null    float64\n",
      " 17  0.04904   568 non-null    float64\n",
      " 18  0.05373   568 non-null    float64\n",
      " 19  0.01587   568 non-null    float64\n",
      " 20  0.03003   568 non-null    float64\n",
      " 21  0.006193  568 non-null    float64\n",
      " 22  25.38     568 non-null    float64\n",
      " 23  17.33     568 non-null    float64\n",
      " 24  184.6     568 non-null    float64\n",
      " 25  2019      568 non-null    float64\n",
      " 26  0.1622    568 non-null    float64\n",
      " 27  0.6656    568 non-null    float64\n",
      " 28  0.7119    568 non-null    float64\n",
      " 29  0.2654    568 non-null    float64\n",
      " 30  0.4601    568 non-null    float64\n",
      " 31  0.1189    568 non-null    float64\n",
      "dtypes: float64(30), int64(1), object(1)\n",
      "memory usage: 142.1+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>842302</th>\n",
       "      <th>M</th>\n",
       "      <th>17.99</th>\n",
       "      <th>10.38</th>\n",
       "      <th>122.8</th>\n",
       "      <th>1001</th>\n",
       "      <th>0.1184</th>\n",
       "      <th>0.2776</th>\n",
       "      <th>0.3001</th>\n",
       "      <th>0.1471</th>\n",
       "      <th>...</th>\n",
       "      <th>25.38</th>\n",
       "      <th>17.33</th>\n",
       "      <th>184.6</th>\n",
       "      <th>2019</th>\n",
       "      <th>0.1622</th>\n",
       "      <th>0.6656</th>\n",
       "      <th>0.7119</th>\n",
       "      <th>0.2654</th>\n",
       "      <th>0.4601</th>\n",
       "      <th>0.1189</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>0 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [842302, M, 17.99, 10.38, 122.8, 1001, 0.1184, 0.2776, 0.3001, 0.1471, 0.2419, 0.07871, 1.095, 0.9053, 8.589, 153.4, 0.006399, 0.04904, 0.05373, 0.01587, 0.03003, 0.006193, 25.38, 17.33, 184.6, 2019, 0.1622, 0.6656, 0.7119, 0.2654, 0.4601, 0.1189]\n",
       "Index: []\n",
       "\n",
       "[0 rows x 32 columns]"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "df = pd.read_csv(\"wdbcdata.txt\")    #读取数据\n",
    "                                    #检查数据\n",
    "df.info()\n",
    "df.describe()\n",
    "df.isnull().sum()\n",
    "df.duplicated()\n",
    "df[df.duplicated()]  #"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "b216a9ac-71ba-4bcd-ba9d-80cc78b87f2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 通过数据对象的describe()函数进行简单的描述性统计分析--均值，标准差，分位数\n",
    "# 通过value_counts()函数对类别型变量进行频数统计\n",
    "#print(df.M.value_counts()) \n",
    "\n",
    "# 利用concat()函数把转换后的数据对象全部合并在一起变成新的数据对象，命名为trainData_X\n",
    "trainData_X = pd.concat([\n",
    "    df['842302'],\n",
    "    df['M'],\n",
    "    df['17.99'],\n",
    "    df['10.38'],\n",
    "    df['122.8'],\n",
    "    df['1001'],\n",
    "    df['0.1184'],\n",
    "    df['0.2776'],\n",
    "    df['0.3001'],\n",
    "    df['0.1471'],\n",
    "    df['0.2419'],\n",
    "    df['0.07871'],\n",
    "    df['1.095'],\n",
    "    df['0.9053'],\n",
    "    df['8.589'],\n",
    "    df['153.4'],\n",
    "    df['0.006399'],\n",
    "    df['0.04904'],\n",
    "    df['0.05373'],\n",
    "    df['0.01587'],\n",
    "    df['0.03003'],\n",
    "    df['0.006193'],\n",
    "    df['25.38'],\n",
    "    df['17.33'],\n",
    "    df['184.6'],\n",
    "    df['2019'],\n",
    "    df['0.1622'],\n",
    "    df['0.6656'],\n",
    "    df['0.7119'],\n",
    "    df['0.2654'],\n",
    "    df['0.4601'],\n",
    "    df['0.1189']\n",
    "], axis=1)\n",
    "trainData_X.head()\n",
    "\n",
    "# 构造目标变量y的数据对象\n",
    "# 当前的目标变量good_bad的取值是'good'和'bad'，一般我们习惯把二分类的类别值编码为0和1，1一般表示类别比较少的那一类，他们往往是我们感兴趣的\n",
    "df['target'] = 0\n",
    "df.loc[(df.M == 'target'), 'target'] = 1\n",
    "\n",
    "trainData_y = df['target'] \n",
    "trainData_y.value_counts()             #统计不同的值的个数\n",
    "\n",
    "\n",
    "# 拆分训练集和测试集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(trainData_X, trainData_y, test_size=0.3, random_state=123456)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "ba62638c-e93b-4cf2-bdf6-8be64eb585a7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "0    357\n",
       "1    211\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['target'] = 0\n",
    "df.loc[(df.M == 'M'), 'target'] = 1\n",
    "\n",
    "trainData_y = df['target'] \n",
    "trainData_y.value_counts() "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "670b2b74-dd05-4287-9c76-05dbb8123188",
   "metadata": {},
   "source": [
    "## Step3：模型训练和调参\n",
    "选择一个分类算法\n",
    "通过fit()训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "3255870e-7db7-4a96-9deb-81093bf721b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(397, 35) (171, 35)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-3 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-3 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-3 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-3 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-3 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-3 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" checked><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(solver=&#x27;liblinear&#x27;)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "LogisticRegression(solver='liblinear')"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对'M'列进行独热编码，如果不转变成读热编码会出现无法读取字符串的问题\n",
    "encoder = OneHotEncoder()\n",
    "M_encoded = encoder.fit_transform(df[['M']])\n",
    "M_encoded_df = pd.DataFrame(M_encoded.toarray(), columns=encoder.get_feature_names_out(['M']))\n",
    "\n",
    "# 将编码后的列合并回原始特征数据\n",
    "X_train = pd.concat([X_train, M_encoded_df.loc[X_train.index]], axis=1)\n",
    "X_test = pd.concat([X_test, M_encoded_df.loc[X_test.index]], axis=1)\n",
    "\n",
    "print(X_train.shape, X_test.shape)\n",
    "\n",
    "# 通过LogisticRegression类定义一个逻辑回归模型名字叫lr\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "\n",
    "# 对lr模型进行训练(fit)\n",
    "lr.fit(X_train, y_train)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "6f922918-b287-4ba4-83c3-27b734ce0252",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-2.57923474e-05]\n",
      "[[-6.49846380e-10 -2.09991267e-04 -3.85388758e-04 -1.28894506e-03\n",
      "  -1.92140688e-03 -2.20283701e-06 -8.33498928e-07  1.03823720e-06\n",
      "   5.66066954e-07 -4.16932614e-06 -1.63444709e-06 -1.16315261e-06\n",
      "  -3.11139825e-05 -6.78920911e-06  4.28492459e-04 -1.85264493e-07\n",
      "  -3.46716650e-07 -3.83955616e-07 -1.47704432e-07 -5.20718431e-07\n",
      "  -8.73379491e-08 -1.97043955e-04 -4.90186742e-04 -1.20691514e-03\n",
      "   1.96344776e-03 -2.85069656e-06 -1.10192133e-06  1.11643052e-06\n",
      "   1.45106233e-07 -6.01808678e-06 -1.83820447e-06 -4.70468965e-05\n",
      "   2.12545491e-05 -4.70468965e-05  2.12545491e-05]]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[-6.498463801084951e-10]</td>\n",
       "      <td>842302</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[-0.00020999126742133318]</td>\n",
       "      <td>17.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[-0.000385388758125898]</td>\n",
       "      <td>10.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[-0.0012889450582046521]</td>\n",
       "      <td>122.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[-0.001921406882053203]</td>\n",
       "      <td>1001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[-2.202837009819872e-06]</td>\n",
       "      <td>0.1184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[-8.334989280661177e-07]</td>\n",
       "      <td>0.2776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[1.0382372003163258e-06]</td>\n",
       "      <td>0.3001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[5.66066953825104e-07]</td>\n",
       "      <td>0.1471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[-4.16932613508626e-06]</td>\n",
       "      <td>0.2419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>[-1.634447092021541e-06]</td>\n",
       "      <td>0.07871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>[-1.1631526091791624e-06]</td>\n",
       "      <td>1.095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>[-3.111398249939994e-05]</td>\n",
       "      <td>0.9053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>[-6.789209109443947e-06]</td>\n",
       "      <td>8.589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>[0.00042849245871541623]</td>\n",
       "      <td>153.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>[-1.8526449305401452e-07]</td>\n",
       "      <td>0.006399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>[-3.4671665001186047e-07]</td>\n",
       "      <td>0.04904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>[-3.839556164883393e-07]</td>\n",
       "      <td>0.05373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>[-1.4770443165920316e-07]</td>\n",
       "      <td>0.01587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>[-5.207184314349898e-07]</td>\n",
       "      <td>0.03003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>[-8.733794913967671e-08]</td>\n",
       "      <td>0.006193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>[-0.00019704395536422148]</td>\n",
       "      <td>25.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>[-0.0004901867424322411]</td>\n",
       "      <td>17.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>[-0.0012069151439974548]</td>\n",
       "      <td>184.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>[0.0019634477585179838]</td>\n",
       "      <td>2019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>[-2.850696558830879e-06]</td>\n",
       "      <td>0.1622</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>[-1.1019213326120733e-06]</td>\n",
       "      <td>0.6656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>[1.1164305208223171e-06]</td>\n",
       "      <td>0.7119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>[1.451062328122699e-07]</td>\n",
       "      <td>0.2654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>[-6.018086784298902e-06]</td>\n",
       "      <td>0.4601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>[-1.8382044670486964e-06]</td>\n",
       "      <td>0.1189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>[-4.7046896508097187e-05]</td>\n",
       "      <td>M_B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>[2.125454909793857e-05]</td>\n",
       "      <td>M_M</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>[-4.7046896508097187e-05]</td>\n",
       "      <td>M_B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>[2.125454909793857e-05]</td>\n",
       "      <td>M_M</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         coef   columns\n",
       "0    [-6.498463801084951e-10]    842302\n",
       "1   [-0.00020999126742133318]     17.99\n",
       "2     [-0.000385388758125898]     10.38\n",
       "3    [-0.0012889450582046521]     122.8\n",
       "4     [-0.001921406882053203]      1001\n",
       "5    [-2.202837009819872e-06]    0.1184\n",
       "6    [-8.334989280661177e-07]    0.2776\n",
       "7    [1.0382372003163258e-06]    0.3001\n",
       "8      [5.66066953825104e-07]    0.1471\n",
       "9     [-4.16932613508626e-06]    0.2419\n",
       "10   [-1.634447092021541e-06]   0.07871\n",
       "11  [-1.1631526091791624e-06]     1.095\n",
       "12   [-3.111398249939994e-05]    0.9053\n",
       "13   [-6.789209109443947e-06]     8.589\n",
       "14   [0.00042849245871541623]     153.4\n",
       "15  [-1.8526449305401452e-07]  0.006399\n",
       "16  [-3.4671665001186047e-07]   0.04904\n",
       "17   [-3.839556164883393e-07]   0.05373\n",
       "18  [-1.4770443165920316e-07]   0.01587\n",
       "19   [-5.207184314349898e-07]   0.03003\n",
       "20   [-8.733794913967671e-08]  0.006193\n",
       "21  [-0.00019704395536422148]     25.38\n",
       "22   [-0.0004901867424322411]     17.33\n",
       "23   [-0.0012069151439974548]     184.6\n",
       "24    [0.0019634477585179838]      2019\n",
       "25   [-2.850696558830879e-06]    0.1622\n",
       "26  [-1.1019213326120733e-06]    0.6656\n",
       "27   [1.1164305208223171e-06]    0.7119\n",
       "28    [1.451062328122699e-07]    0.2654\n",
       "29   [-6.018086784298902e-06]    0.4601\n",
       "30  [-1.8382044670486964e-06]    0.1189\n",
       "31  [-4.7046896508097187e-05]       M_B\n",
       "32    [2.125454909793857e-05]       M_M\n",
       "33  [-4.7046896508097187e-05]       M_B\n",
       "34    [2.125454909793857e-05]       M_M"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看模型结果\n",
    "print(lr.intercept_ )\n",
    "print(lr.coef_)\n",
    "\n",
    "# 把变量名称和系数对应在一起方便查看\n",
    "pd.DataFrame(list(zip(np.transpose(lr.coef_), X_train.columns)), columns=['coef', 'columns'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1a638d9-c302-4762-a11f-172f581e5cce",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "26c6d3e1-a09d-4966-904b-ce8467d40e4e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "f:\\anaconda3\\Lib\\site-packages\\sklearn\\svm\\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  warnings.warn(\n",
      "f:\\anaconda3\\Lib\\site-packages\\sklearn\\svm\\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  warnings.warn(\n",
      "f:\\anaconda3\\Lib\\site-packages\\sklearn\\svm\\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  warnings.warn(\n",
      "f:\\anaconda3\\Lib\\site-packages\\sklearn\\svm\\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  warnings.warn(\n",
      "f:\\anaconda3\\Lib\\site-packages\\sklearn\\svm\\_base.py:1237: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression(C=0.5, penalty='l1', solver='liblinear')\n",
      "1.0\n",
      "{'C': 0.5, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "# grid search调参\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "parameters = {\n",
    "    'penalty': ('l1', 'l2'),\n",
    "    'C': (0.01, 0.1, 0.5 , 1,5, 10),\n",
    "}\n",
    "\n",
    "lr = LogisticRegression(solver='liblinear')\n",
    "lr_search = GridSearchCV(lr, parameters, scoring='accuracy', cv=5)\n",
    "lr_search.fit(X_train, y_train)\n",
    "\n",
    "#查看最佳结果\n",
    "print(lr_search.best_estimator_)\n",
    "print(lr_search.best_score_)\n",
    "print(lr_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "47b5be53-d9eb-4027-a03f-3e93bf6274fb",
   "metadata": {},
   "source": [
    "## 评估模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "ced643ee-0318-4ab9-8937-9a79f0f8e561",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "f4db0d43-72b7-4c07-88e3-0d7799aa8504",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00       111\n",
      "           1       1.00      1.00      1.00        60\n",
      "\n",
      "    accuracy                           1.00       171\n",
      "   macro avg       1.00      1.00      1.00       171\n",
      "weighted avg       1.00      1.00      1.00       171\n",
      "\n",
      "[[111   0]\n",
      " [  0  60]]\n"
     ]
    }
   ],
   "source": [
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = lr_search.predict(X_test)\n",
    "y_test_prob = lr_search.predict_proba(X_test)\n",
    "\n",
    "# 输出分类报告和混淆矩阵\n",
    "print(classification_report(y_test, y_test_pred))\n",
    "print(confusion_matrix(y_test, y_test_pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c88428d5-f643-4d69-9549-445738da8b20",
   "metadata": {},
   "source": [
    "## 85分钟"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a4bdf9d-62b4-4c84-a071-e1371b07bb67",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
